﻿/* 
%include "/projects/hsieh_project/proj_201809/code_1_data/data_0_ind_sum_core.sas" /source2;

Macro that aggregates data to the industry level.

dt_in = name of input data
dt_out = name of output data
c_city = variable for city/geo
l_perc= list of percentiles to create statistics for (e.g. top 10 percentile ind-firms defined based on employment)
*/

/*
================================================================================
Calculate statistics at industry level, with no reference to city
*/

%macro ind_sum(dt_in= , dt_out= , l_perc= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put Percentile &l_perc.;

data dt_proc;
  set &dt_in.(keep=year msa fips ch_ind sector firmnum lbdid worker est salary indf_rank indf_perc runif);
run;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-firm level
*/
proc sort data=dt_proc;
  by year sector ch_ind firmnum indf_rank indf_perc;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind firmnum indf_rank indf_perc;
  output out=dt_indf(drop=_type_ _freq_) sum(worker est salary)=emp_ind est_ind pay_ind mean(runif)=runif;
run;

data dt_indf;
  set dt_indf;
  n_ind=1;
  emp2_indf = emp_ind**2; /* For HHI */
run;

/*
--------------------------------------------------------------------------------
Create variables related to top firms
e.g. employment of top 10% industry-firms (percentile based on employment and defined at industry level)
*/

%Let l_ind_var=emp_ind est_ind pay_ind n_ind emp2_indf;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    emp_ind_&i_perc. = 0;
    est_ind_&i_perc. = 0;
    pay_ind_&i_perc. = 0;
    n_ind_&i_perc. = 0;
  run;
  
  data dt_indf;
    set dt_indf;
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	emp_ind_&i_perc. = emp_ind;
	est_ind_&i_perc. = est_ind;
	pay_ind_&i_perc. = pay_ind;
	n_ind_&i_perc. = 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. emp_ind_&i_perc. est_ind_&i_perc. pay_ind_&i_perc. n_ind_&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry level
*/

proc sort data=dt_indf; by year sector ch_ind;

proc means data=dt_indf noprint;
  by year sector ch_ind;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_ind_var.)=&l_ind_var.;
run;

/* Calculate HHI */
data &dt_out.;
  set &dt_out.;
  hhi_ind = emp2_indf / (emp_ind**2);
run;

%mend;


/*
================================================================================
Calculate statistics at industry level, with reference to city
1) Total numbers of markets by industry: count number of markets for each ind-firm, and sum across ind-firms by industry.
2) Average market size by industry: calculate the average market size for each ind-firm, and take average across ind-firms by industry.
  2.1) Market size defined as total employment in city.
*/

%macro ind_sum_city(dt_in= , dt_out= , c_city= , l_perc= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put City &c_city.;
%put Percentile &l_perc.;

data dt_proc;
  set &dt_in.(keep=year &c_city. ch_ind sector firmnum lbdid worker est salary indf_rank indf_perc runif);
run;

/* 
--------------------------------------------------------------------------------
Aggregate to year-industry-firm-city level
*/

proc sort data=dt_proc;
  by year sector ch_ind firmnum indf_rank indf_perc &c_city.;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind firmnum indf_rank indf_perc &c_city.;
  output out=dt_indf(drop=_type_ _freq_) sum(worker est salary)=worker est salary mean(runif)=runif;
run;

/*
--------------------------------------------------------------------------------
Numbers of market served by firms
*/

/* Count total number of cities for all firms */
data dt_indf;
  set dt_indf;
  ind_&c_city._tot=1;
run;

/* Count total number of cities for top firms */
%Let l_ind_var=ind_&c_city._tot;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    ind_&c_city._tot_&i_perc.=0;
    
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	ind_&c_city._tot_&i_perc.= 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. ind_&c_city._tot_&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;


/*
----------------------------------------
Aggregate to year-industry level
*/
proc sort data=dt_indf; by year sector ch_ind;

proc means data=dt_indf noprint;
  by year sector ch_ind;
  output out=&dt_out.(drop=_type_ _freq_) sum(&l_ind_var.)=&l_ind_var.;
run;


/*
--------------------------------------------------------------------------------
Average market size served by firms 
*/

/*
----------------------------------------
Aggregate to year-city level
dt_city = employment for each city
*/

data dt_city;
  set dt_indf;
run;

proc sort data=dt_city;
  by year &c_city.;
run;

proc means data=dt_city noprint;
  by year &c_city.;
  output out=dt_city(drop=_type_ _freq_) sum(worker)=city_emp;
run;

/*
----------------------------------------
Calculate average market size for each firm
dt_cityf = average market size for each ind-firm
*/

data dt_cityf;
  set dt_indf;
run;

proc sort data=dt_cityf;
  by year &c_city.;
run;

data dt_cityf;
  merge dt_cityf dt_city;
  by year &c_city.;
run;

proc sort data=dt_cityf;
  by year ch_ind firmnum indf_perc;
run;

proc means data=dt_cityf noprint;
  by year ch_ind firmnum indf_perc;
  output out=dt_cityf(drop=_type_ _freq_) mean(city_emp)=mkt_&c_city. sum(city_emp)=mkts_&c_city;
run;

/*
----------------------------------------
Calculate average market size by industry
dt_mkt = average of average market size across ind-firm by industry
*/

%Let l_mkt=mkt_&c_city.;
%Let l_mkts=mkts_&c_city.;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_cityf;
    set dt_cityf;
    mkt_&c_city._&i_perc.=.;
    
    if indf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	mkt_&c_city._&i_perc.= mkt_&c_city.;
	mkts_&c_city._&i_perc.= mkts_&c_city.;
      end;
  run;
  
  %Let l_mkt = &l_mkt. mkt_&c_city._&i_perc.;
  %Let l_mkts = &l_mkts. mkts_&c_city._&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Market Variables for Top Firms;
%put &l_mkt.;
%put &l_mkts.;

proc means data=dt_cityf noprint;
  by year ch_ind;
  output out=dt_mkt(drop=_type_ _freq_) mean(&l_mkt.)=&l_mkt. sum(&l_mkts.)=&l_mkts.;
run;


/*
--------------------------------------------------------------------------------
Merge
*/

proc sort data=&dt_out.; by year ch_ind;
proc sort data=dt_mkt; by year ch_ind;

data &dt_out.;
  set &dt_out.;
  drop _type_ _freq_;
run;

data dt_mkt;
  set dt_mkt;
  drop _type_ _freq_;
run;

data &dt_out.;
  merge &dt_out. dt_mkt;
  by year ch_ind;
run;

proc sort data=&dt_out.; by year sector ch_ind;

%mend;

/* End of SAS file */
